# Replication Script for 
# "Coordinating Nominations: How to Deal with 
# an Incumbent Surplus after Electoral Reform"
# published in Japanese Journal of Political Science
# author: Jochen Rehmert

# Script replicates:
# Tables: 1, 8, 9, 10
# Figures: 2, 3

# directory
setwd("...")

# packages 
library(foreign);library(stargazer);library(MASS)
library(sandwich);library(lmtest);library(ggplot2)

# load data
dat <- read.dta("coordination1.dta")

#################################################
# Table 1: Candidate Nomination in New Districts#
#################################################
# get indicators for relevant districs
# for incumbent only
summary(nom.tmp.inc <- clogit(nominated ~    borninku + mainfaction+  exp_voteshare_cand  + strata(smd) , 
                              data = dat[dat$incumbent == 1 & dat$ldp_cand == 1 & dat$candidate_type != "new"  ,]))
sample.smds.inc <- names(which(table(dat[row.names(dat) %in% row.names(model.matrix(nom.tmp.inc)), "smd"]) != 1))

# for all candidates
summary(nom.tmp <- clogit(nominated ~    borninku + mainfaction+  exp_voteshare_cand + incumbent + strata(smd) , 
                          data = dat[dat$ldp_cand == 1 & dat$candidate_type != "new"  ,]))

sample.smds <- names(which(table(dat[row.names(dat) %in% row.names(model.matrix(nom.tmp)), "smd"]) != 1))

# Model 1
# all candidates
summary(nom.1 <- clogit(nominated ~   exp_voteshare_cand + age_rel + sen_rel  +  borninku + mainfaction + incumbent + strata(smd) , 
                        robust =TRUE, method = "efron" ,model = T,
                        data = dat[dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds  ,]))
# incumbents only
summary(rr.nom.1 <- clogit(nominated ~   exp_voteshare_cand + age_rel + sen_rel  +  borninku + mainfaction  + strata(smd) , 
                           robust =TRUE, method = "efron" , model = T,
                           data = dat[dat$incumbent == 1 & dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds.inc  ,]))
# robust for dynastic 
summary(rr.nom.1.r2 <- clogit(nominated ~   exp_voteshare_cand + age_rel + sen_rel  +  borninku + mainfaction  + pre_mp + strata(smd) , 
                              robust =TRUE, method = "efron" ,
                              data = dat[dat$incumbent == 1 & dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds.inc  ,]))

# Model 2
# all candidates
summary(nom.2 <- clogit(nominated ~  exp_voteshare_cand  + age_rel + sen_rel  +  borninku + mainfaction+   exp_deviation +  geo_area + incumbent + strata(smd)  , 
                        robust =TRUE, method = "efron" ,
                        data = dat[ dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds  ,]))
# incumbents only
summary(rr.nom.2 <- clogit(nominated ~  exp_voteshare_cand  + age_rel + sen_rel  +  borninku + mainfaction+   exp_deviation +  geo_area + strata(smd)  , 
                           robust =TRUE, method = "efron" ,
                           data = dat[dat$incumbent == 1 & dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds.inc  ,]))

# Model 3
# all candidates
summary(nom.3 <- clogit(nominated ~   exp_voteshare_cand + age_rel + sen_rel + borninku + mainfaction+     exp_deviation  *  geo_area + incumbent +  strata(smd)  , 
                        robust =TRUE, method = "efron" ,
                        data = dat[ dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds ,]))

# robust for share of people
summary(nom.3.r1 <- clogit(nominated ~   exp_voteshare_cand + age_rel + sen_rel + borninku + mainfaction + exp_deviation  *  share_newpop + incumbent +  strata(smd)  , 
                           robust =TRUE, method = "efron" ,model = T,
                           data = dat[dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds ,]))

# robust for unweighted deviation
summary(nom.3.r3 <- clogit(nominated ~   exp_voteshare_cand + age_rel + sen_rel + borninku + mainfaction + exp_deviation2 *  geo_area + incumbent +  strata(smd)  , 
                           robust =TRUE, method = "efron" ,model = T,
                           data = dat[dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds ,]))


# incumbenty only
summary(rr.nom.3 <- clogit(nominated ~   exp_voteshare_cand + age_rel + sen_rel + borninku + mainfaction+     exp_deviation  *  geo_area  +  strata(smd)  , 
                           robust =TRUE, method = "efron" ,model = T,
                           data = dat[dat$incumbent == 1 &  dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds.inc ,]))

# robust for share of people
summary(rr.nom.3.r1 <- clogit(nominated ~   exp_voteshare_cand + age_rel + sen_rel + borninku + mainfaction + exp_deviation  *  share_newpop  +  strata(smd)  , 
                              robust =TRUE, method = "efron" ,model = T,
                              data = dat[dat$incumbent == 1 &  dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds.inc ,]))
# robust for dynastic
summary(rr.nom.3.r2 <- clogit(nominated ~   exp_voteshare_cand + age_rel + sen_rel + borninku + mainfaction+  pre_mp +   exp_deviation  *  geo_area   + strata(smd)  , 
                              robust =TRUE, method = "efron" ,
                              data = dat[dat$incumbent == 1 &  dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds.inc ,]))

# robust for unweighted deviation
summary(rr.nom.3.r3 <- clogit(nominated ~   exp_voteshare_cand + age_rel + sen_rel + borninku + mainfaction+     exp_deviation2  *  geo_area   + strata(smd)  , 
                              robust =TRUE, method = "efron" ,
                              data = dat[dat$incumbent == 1 &  dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds.inc ,]))




## Table 1
stargazer(list(rr.nom.1, rr.nom.2, rr.nom.3, nom.1, nom.2, nom.3),
          se = list(summary(rr.nom.1)$coefficient[,4],
                    summary(rr.nom.2)$coefficient[,4],
                    summary(rr.nom.3)$coefficient[,4],
                    summary(nom.1)$coefficient[,4],
                    summary(nom.2)$coefficient[,4],
                    summary(nom.3)$coefficient[,4]),
          covariate.labels = c("Expected Candidate Voteshare","Age","Seniority", "Born in District","Mainstream Faction","Deviation from Median (Deviation)",
                               "Proportion of new Geographic Area (Area)", 
                               "Deviation $\\times$ Area", "Incumbent"),
          add.lines = list(c("Number of Districts", rr.nom.1$nevent, rr.nom.2$nevent, rr.nom.3$nevent, nom.1$nevent, nom.2$nevent, nom.3$nevent),
                           c("Number of Candidates",
                             length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(rr.nom.1))])),
                             length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(rr.nom.2))])),
                             length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(rr.nom.3))])), 
                             length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(nom.1))])),
                             length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(nom.2))])),
                             length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(nom.3))])))))


############################################
# Table 8: Controlling for Dynastic Status #
############################################
stargazer(list(rr.nom.1.r2, rr.nom.3.r2),
          se = list(summary(rr.nom.1.r2)$coefficient[,4],
                    summary(rr.nom.3.r2)$coefficient[,4]),
          covariate.labels = c("Expected Candidate Voteshare","Age","Seniority", "Born in District","Mainstream Faction",
                               "Dynastic Candidate","Deviation from Median (Deviation)",
                               "Proportion of new Geographic Area (Area)", 
                               "Deviation $\\times$ Area"),
          add.lines = list(c("Number of Districts", rr.nom.1.r2$nevent, rr.nom.3.r2$nevent),
                           c("Number of Candidates",length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(rr.nom.1.r2))])),
                             length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(rr.nom.3.r2))])))))

######################################################################
# Table 9: Controlling for Unweighted Ideology & Share of Population #
######################################################################
stargazer(list(rr.nom.3.r3 , rr.nom.3.r1 , nom.3.r3, nom.3.r1),
          se = list(summary(rr.nom.3.r3)$coefficient[,4],
                    summary(rr.nom.3.r1)$coefficient[,4],
                    summary(nom.3.r3)$coefficient[,4],
                    summary(nom.3.r1)$coefficient[,4]),
          covariate.labels = c("Expected Candidate Voteshare","Age","Seniority", "Born in District","Mainstream Faction","Deviation from Median, unweighted (Deviation W)",
                               "Proportion of new Geographic Area (Area)", 
                               "Incumbent",
                               "Deviation W $\\times$ Area", 
                               "Deviation from Median, (Deviation)",
                               "Share of New Electorate (New)", 
                               "Deviation $\\times$ New"),
          add.lines = list(c("Number of Districts", rr.nom.3.r3$nevent, rr.nom.3.r1$nevent, nom.3.r3$nevent, nom.3.r1$nevent),
                           c("Number of Candidates", 
                             length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(rr.nom.3.r3))])),
                             length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(rr.nom.3.r1))])),
                             length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(nom.3.r3))])),
                             length(unique(dat$name_jp[row.names(dat) %in% row.names(model.matrix(nom.3.r1))])))))

##################################################################
# Table 10: Electoral Results of Candidates Matched to Districts #
##################################################################

dat$result2 <- ifelse(dat$result == 1, 1, 0)

summary(won.1 <- glm(result2 ~ as.factor(prefecture) + exp_voteshare_cand , data = dat[row.names(dat) %in% row.names(model.matrix(nom.1)) & dat$nominated == 1, ]))
won.1.se <- sqrt(diag(cluster.vcov(won.1, ~ prefecture)))

summary(won.2 <- glm(result2 ~ as.factor(prefecture) + geo_area + exp_deviation+ exp_voteshare_cand , data = dat[row.names(dat) %in% row.names(model.matrix(nom.1)) & dat$nominated == 1, ]))
won.2.se <- sqrt(diag(cluster.vcov(won.2, ~ prefecture)))

summary(won.3 <- glm(result2 ~ as.factor(prefecture) + geo_area * exp_deviation+ exp_voteshare_cand , data = dat[row.names(dat) %in% row.names(model.matrix(nom.1)) & dat$nominated == 1, ]))
won.3.se <- sqrt(diag(cluster.vcov(won.3, ~ prefecture)))

summary(won.4 <- glm(result2 ~ as.factor(prefecture) + geo_area * exp_deviation+ exp_voteshare_cand , data = dat[row.names(dat) %in% row.names(model.matrix(nom.1)) & dat$nominated == 1 & dat$geo_area > 0, ]))
won.4.se <- sqrt(diag(cluster.vcov(won.4, ~ prefecture)))


#######################################
# Table 10
#######################################
stargazer(list(won.1, won.2, won.3, won.4,won.1, won.2, won.3, won.4), 
          se = list(won.1.se, won.2.se, won.3.se, won.4.se, NULL, NULL, NULL, NULL), 
          omit = "prefecture",
          add.lines = list(c("Prefecture Fixed-Effects", rep('$\\checkmark$' ,8)    )))


###########################################################################################
# Figure 2 Predicted Probabilitiy of District Nomination Conditional on Electoral Strength#
###########################################################################################
mod <- rr.nom.1
require(MASS)
set.seed(1104)
n.sim = 1000
b.sim <- mvrnorm(n.sim, coef(mod), vcov(mod))

x.cond <- seq(min(data.frame(model.matrix(mod))[['exp_voteshare_cand']] ),
              max(data.frame(model.matrix(mod))[['exp_voteshare_cand']] ),
              length.out = 5)
dat.tmp0.low <- c()
for(ii in 1:length(x.cond)){
  tmp <- data.frame(rbind(cbind(exp_voteshare_cand = x.cond[ii],
                                age_rel = 0 ,
                                sen_rel = 0,
                                borninku = 0,
                                0,
                                fold = ii),
                          cbind(exp_voteshare_cand = mean(data.frame(model.matrix(mod))[['exp_voteshare_cand']] ),
                                age_rel = 0,
                                sen_rel = 0,
                                borninku = 0,
                                0,
                                fold = ii),
                          cbind(exp_voteshare_cand = mean(data.frame(model.matrix(mod))[['exp_voteshare_cand']] ),
                                age_rel = 0,
                                sen_rel = 0,
                                borninku = 0,
                                0,
                                fold = ii)))
  dat.tmp0.low <- rbind(dat.tmp0.low, tmp)
}
lin.preds0.low <- as.matrix(dat.tmp0.low[,-6]) %*% t(b.sim)

sums.1 <- apply(lin.preds0.low[c(1:3),], 2,function(x) sum(exp(x), na.rm = T) )
sums.2 <- apply(lin.preds0.low[c(4:6),], 2,function(x) sum(exp(x), na.rm = T) )
sums.3 <- apply(lin.preds0.low[c(7:9),], 2,function(x) sum(exp(x), na.rm = T) )
sums.4 <- apply(lin.preds0.low[c(10:12),], 2,function(x) sum(exp(x), na.rm = T) )
sums.5 <- apply(lin.preds0.low[c(13:15),], 2,function(x) sum(exp(x), na.rm = T) )


probs.1 <- as.data.frame(exp(lin.preds0.low[1:3,1])/sums.1[1])
probs.2 <- as.data.frame(exp(lin.preds0.low[4:6,1])/sums.2[1])
probs.3 <- as.data.frame(exp(lin.preds0.low[7:9,1])/sums.3[1])
probs.4 <- as.data.frame(exp(lin.preds0.low[10:12,1])/sums.4[1])
probs.5 <- as.data.frame(exp(lin.preds0.low[13:15,1])/sums.5[1])

for(cc in 2:n.sim){
  
  probs.1 <- as.data.frame(cbind(probs.1, (exp(lin.preds0.low[1:3,cc])/sums.1[cc])))
  probs.2 <- as.data.frame(cbind(probs.2, (exp(lin.preds0.low[4:6,cc])/sums.2[cc])))
  probs.3 <- as.data.frame(cbind(probs.3, (exp(lin.preds0.low[7:9,cc])/sums.3[cc])))
  probs.4 <- as.data.frame(cbind(probs.4, (exp(lin.preds0.low[10:12,cc])/sums.4[cc])))
  probs.5 <- as.data.frame(cbind(probs.5, (exp(lin.preds0.low[13:15,cc])/sums.5[cc])))
}

dat.tmp0.low$pred <- NA
dat.tmp0.low$pred[1:3] <- apply(probs.1, 1,mean, na.rm = T) 
dat.tmp0.low$pred[4:6] <- apply(probs.2, 1,mean, na.rm = T) 
dat.tmp0.low$pred[7:9] <- apply(probs.3, 1,mean, na.rm = T) 
dat.tmp0.low$pred[10:12] <- apply(probs.4, 1,mean, na.rm = T) 
dat.tmp0.low$pred[13:15] <- apply(probs.5, 1,mean, na.rm = T) 
dat.tmp0.low$lower95 <- NA
dat.tmp0.low$lower95[1:3] <- apply(probs.1, 1,quantile, probs = 0.025, na.rm = T)
dat.tmp0.low$lower95[4:6] <- apply(probs.2, 1,quantile, probs = 0.025, na.rm = T)
dat.tmp0.low$lower95[7:9] <- apply(probs.3, 1,quantile, probs = 0.025, na.rm = T) 
dat.tmp0.low$lower95[10:12] <- apply(probs.4, 1,quantile, probs = 0.025, na.rm = T)
dat.tmp0.low$lower95[13:15] <- apply(probs.5, 1,quantile, probs = 0.025, na.rm = T)
dat.tmp0.low$upper95 <- NA
dat.tmp0.low$upper95[1:3] <- apply(probs.1, 1,quantile, probs = 0.975, na.rm = T)
dat.tmp0.low$upper95[4:6] <- apply(probs.2, 1,quantile, probs = 0.975, na.rm = T)
dat.tmp0.low$upper95[7:9] <- apply(probs.3, 1,quantile, probs = 0.975, na.rm = T) 
dat.tmp0.low$upper95[10:12] <- apply(probs.4, 1,quantile, probs = 0.975, na.rm = T)
dat.tmp0.low$upper95[13:15] <- apply(probs.5, 1,quantile, probs = 0.975, na.rm = T)
dat.tmp0.low$lower90 <- NA
dat.tmp0.low$lower90[1:3] <- apply(probs.1, 1,quantile, probs = 0.05, na.rm = T)
dat.tmp0.low$lower90[4:6] <- apply(probs.2, 1,quantile, probs = 0.05, na.rm = T)
dat.tmp0.low$lower90[7:9] <- apply(probs.3, 1,quantile, probs = 0.05, na.rm = T) 
dat.tmp0.low$lower90[10:12] <- apply(probs.4, 1,quantile, probs = 0.05, na.rm = T)
dat.tmp0.low$lower90[13:15] <- apply(probs.5, 1,quantile, probs = 0.05, na.rm = T)
dat.tmp0.low$upper90 <- NA
dat.tmp0.low$upper90[1:3] <- apply(probs.1, 1,quantile, probs = 0.95, na.rm = T)
dat.tmp0.low$upper90[4:6] <- apply(probs.2, 1,quantile, probs = 0.95, na.rm = T)
dat.tmp0.low$upper90[7:9] <- apply(probs.3, 1,quantile, probs = 0.95, na.rm = T) 
dat.tmp0.low$upper90[10:12] <- apply(probs.4, 1,quantile, probs = 0.95, na.rm = T)
dat.tmp0.low$upper90[13:15] <- apply(probs.5, 1,quantile, probs = 0.95, na.rm = T)


par(mfrow = c(1,3),oma=c(2,2,0,1),
    mar=c(0.5,2,0,1), 
    mai = c(.25,.7,.3,.3),
    omi = c(.5,.5,0,0))
plot(c(3:1) ~ dat.tmp0.low$pred[dat.tmp0.low$fold == 2] , main = "Voteshare 15%", cex.main = 1.25 , #ylim = c(0,.8), 
     xlim = c(-0.4,1), type = 'n',xlab = "t", ylab = "", yaxt = "n", xaxt = 'n', frame = F)
points(c(3:1) ~ dat.tmp0.low$pred[dat.tmp0.low$fold == 2] , pch = 19)
text(y = c(3 ), x = -0.05, par("usr")[1], labels = paste0("Candidate ",1 ), cex = 1.3, srt = 0, pos = 2, xpd = TRUE)
text(y = c(2:1), x = -0.05, par("usr")[1], labels = paste0("Candidate ",2:3, "", " \n Voteshare: 17%" ), cex = 1.3, srt = 1, pos = 2, xpd = TRUE)

axis(1, at = seq(0,1,0.2), labels =  seq(0,1,0.2), cex.axis = 1.5,
     col = "white")
segments(x0 = dat.tmp0.low$lower95[dat.tmp0.low$fold == 2], y0 = c(3:1), 
         x1 = dat.tmp0.low$upper95[dat.tmp0.low$fold == 2], y1 = c(3:1))

text(x = dat.tmp0.low[dat.tmp0.low$fold == 2 ,c("pred")][-1], y= 2:1 , labels = round(dat.tmp0.low[dat.tmp0.low$fold == 2 ,c("pred")][-1],2),
     pos = 3)
text(x = dat.tmp0.low[dat.tmp0.low$fold == 2 ,c("pred")][1], y= 3 , labels = round(dat.tmp0.low[dat.tmp0.low$fold == 2 ,c("pred")][1],2),
     pos = 1)

plot(c(3:1) ~ dat.tmp0.low$pred[dat.tmp0.low$fold == 3] , main = "Voteshare 30%" ,cex.main = 1.25 , #ylim = c(0,.8), 
     xlim = c(0,1), type = 'n',xlab = "t", ylab = "", yaxt = "n", xaxt = 'n', frame = F)
points(c(3:1) ~ dat.tmp0.low$pred[dat.tmp0.low$fold == 3] , pch = 19)
axis(1, at = seq(0,1,0.2), labels =  seq(0,1,0.2), cex.axis = 1.5,
     col = "white")
segments(x0 = dat.tmp0.low$lower95[dat.tmp0.low$fold == 3], y0 = c(3:1), 
         x1 = dat.tmp0.low$upper95[dat.tmp0.low$fold == 3], y1 = c(3:1))

text(x = dat.tmp0.low[dat.tmp0.low$fold == 3 ,c("pred")][-1], y= 2:1 , labels = round(dat.tmp0.low[dat.tmp0.low$fold == 3 ,c("pred")][-1],2),
     pos = 3)
text(x = dat.tmp0.low[dat.tmp0.low$fold == 3 ,c("pred")][1], y= 3 , labels = round(dat.tmp0.low[dat.tmp0.low$fold == 3 ,c("pred")][1],2),
     pos = 1)
mtext(side=1, line=3, "Pr(Nomination)",font=2,  las = 1,srt = 90, cex = 1.25)

plot(c(3:1) ~ dat.tmp0.low$pred[dat.tmp0.low$fold == 4] , main = "Voteshare 45%" ,cex.main = 1.25 , #ylim = c(0,.8), 
     xlim = c(-0.05,1), type = 'n',xlab = "t", ylab = "", yaxt = "n", xaxt = 'n', frame = F)
points(c(3:1) ~ dat.tmp0.low$pred[dat.tmp0.low$fold == 4] , pch = 19)
axis(1, at = seq(0,1,0.2), labels =  seq(0,1,0.2), cex.axis = 1.5,
     col = "white")
segments(x0 = dat.tmp0.low$lower95[dat.tmp0.low$fold == 4], y0 = c(3:1), 
         x1 = dat.tmp0.low$upper95[dat.tmp0.low$fold == 4], y1 = c(3:1))
text(x = dat.tmp0.low[dat.tmp0.low$fold == 4 ,c("pred")][-1], y= 2:1 , labels = round(dat.tmp0.low[dat.tmp0.low$fold == 4,c("pred")][-1],2),
     pos = 3)
text(x = dat.tmp0.low[dat.tmp0.low$fold == 4 ,c("pred")][1], y= 3 , labels = round(dat.tmp0.low[dat.tmp0.low$fold == 4 ,c("pred")][1],2),
     pos = 1)




##############################################################################################
# Figure 3 Predicted Probabilitiy of District Nomination Conditional on Deviation from Median#
##############################################################################################

mod <- rr.nom.3
require(MASS);require(ggplot2)
set.seed(1104)
n.sim = 1000
b.sim <- mvrnorm(n.sim, coef(mod), vcov(mod))

x.cond <- c(min(data.frame(model.matrix(mod))[['exp_deviation']] ),
            mean(data.frame(model.matrix(mod))[['exp_deviation']] ),
            max(data.frame(model.matrix(mod))[['exp_deviation']] ))


dat.tmp0.low <- c()
for(ii in 1:length(x.cond)){
  tmp <- data.frame(rbind(cbind(exp_voteshare_cand = mean(data.frame(model.matrix(mod))$exp_voteshare_cand),
                                age_rel = rnorm(data.frame(model.matrix(mod))[['age_rel']])[1] ,
                                sen_rel = 0,
                                borninku = 0,
                                mainfaction = 0,
                                exp_deviation = x.cond[[ii]],
                                geo_area = 0,
                                interaction = 0,
                                fold = ii),
                          cbind(exp_voteshare_cand = mean(data.frame(model.matrix(mod))$exp_voteshare_cand),
                                age_rel = rnorm(data.frame(model.matrix(mod))[['age_rel']])[1] ,
                                sen_rel = 0,
                                borninku = 0,
                                mainfaction = 0,
                                exp_deviation = mean(data.frame(model.matrix(mod))[['exp_deviation']]),
                                geo_area = 0,
                                interaction = mean(data.frame(model.matrix(mod))[['exp_deviation']]) * 0,
                                fold = ii),
                          cbind(exp_voteshare_cand = mean(data.frame(model.matrix(mod))$exp_voteshare_cand),
                                age_rel = rnorm(data.frame(model.matrix(mod))[['age_rel']])[1] ,
                                sen_rel = 0,
                                borninku = 0,
                                mainfaction = 0,
                                exp_deviation = mean(data.frame(model.matrix(mod))[['exp_deviation']]),
                                geo_area = 0,
                                interaction = mean(data.frame(model.matrix(mod))[['exp_deviation']]) *0,
                                fold = ii),
                          cbind(exp_voteshare_cand = mean(data.frame(model.matrix(mod))$exp_voteshare_cand),
                                age_rel = rnorm(data.frame(model.matrix(mod))[['age_rel']])[1] ,
                                sen_rel = 0,
                                borninku = 0,
                                mainfaction = 0,
                                exp_deviation = x.cond[[ii]],
                                geo_area = 0.5,
                                interaction = x.cond[[ii]] * 0.5,
                                fold = ii),
                          cbind(exp_voteshare_cand = mean(data.frame(model.matrix(mod))$exp_voteshare_cand),
                                age_rel = rnorm(data.frame(model.matrix(mod))[['age_rel']])[1] ,
                                sen_rel = 0,
                                borninku = 0,
                                mainfaction = 0,
                                exp_deviation = mean(data.frame(model.matrix(mod))[['exp_deviation']]),
                                geo_area = .5,
                                interaction = mean(data.frame(model.matrix(mod))[['exp_deviation']]) * .5,
                                fold = ii),
                          cbind(exp_voteshare_cand = mean(data.frame(model.matrix(mod))$exp_voteshare_cand),
                                age_rel = rnorm(data.frame(model.matrix(mod))[['age_rel']])[1] ,
                                sen_rel = 0,
                                borninku = 0,
                                mainfaction = 0,
                                exp_deviation = mean(data.frame(model.matrix(mod))[['exp_deviation']]),
                                geo_area = .5,
                                interaction = mean(data.frame(model.matrix(mod))[['exp_deviation']]) * .5,
                                fold = ii),
                          cbind(exp_voteshare_cand = mean(data.frame(model.matrix(mod))$exp_voteshare_cand),
                                age_rel = rnorm(data.frame(model.matrix(mod))[['age_rel']])[1] ,
                                sen_rel = 0,
                                borninku = 0,
                                mainfaction = 0,
                                exp_deviation = x.cond[[ii]],
                                geo_area = 1,
                                interaction = 1* x.cond[[ii]],
                                fold = ii),
                          cbind(exp_voteshare_cand = mean(data.frame(model.matrix(mod))$exp_voteshare_cand),
                                age_rel = rnorm(data.frame(model.matrix(mod))[['age_rel']])[1] ,
                                sen_rel = 0,
                                borninku = 0,
                                mainfaction = 0,
                                exp_deviation = mean(data.frame(model.matrix(mod))[['exp_deviation']]),
                                geo_area = 1,
                                interaction = mean(data.frame(model.matrix(mod))[['exp_deviation']]) * 1,
                                fold = ii),
                          cbind(exp_voteshare_cand = mean(data.frame(model.matrix(mod))$exp_voteshare_cand),
                                age_rel = rnorm(data.frame(model.matrix(mod))[['age_rel']])[1] ,
                                sen_rel = 0,
                                borninku = 0,
                                mainfaction = 0,
                                exp_deviation = mean(data.frame(model.matrix(mod))[['exp_deviation']]),
                                geo_area = 1,
                                interaction = mean(data.frame(model.matrix(mod))[['exp_deviation']]) * 1,
                                fold = ii)  
  ))
  dat.tmp0.low <- rbind(dat.tmp0.low, tmp)
}
lin.preds0.low <- as.matrix(dat.tmp0.low[,-9]) %*% t(b.sim)



sums.min.0 <- apply(lin.preds0.low[c(1:3),], 2,function(x) sum(exp(x), na.rm = T) )
sums.min.05 <- apply(lin.preds0.low[c(4:6),], 2,function(x) sum(exp(x), na.rm = T) )
sums.min.1 <- apply(lin.preds0.low[c(7:9),], 2,function(x) sum(exp(x), na.rm = T) )
sums.mean.0 <- apply(lin.preds0.low[c(10:12),], 2,function(x) sum(exp(x), na.rm = T) )
sums.mean.05 <- apply(lin.preds0.low[c(13:15),], 2,function(x) sum(exp(x), na.rm = T) )
sums.mean.1 <- apply(lin.preds0.low[c(16:18),], 2,function(x) sum(exp(x), na.rm = T) )
sums.max.0 <- apply(lin.preds0.low[c(19:21),], 2,function(x) sum(exp(x), na.rm = T) )
sums.max.05 <- apply(lin.preds0.low[c(22:24),], 2,function(x) sum(exp(x), na.rm = T) )
sums.max.1 <- apply(lin.preds0.low[c(25:27),], 2,function(x) sum(exp(x), na.rm = T) )


probs.min.0 <- as.data.frame(exp(lin.preds0.low[1:3,1])/sums.min.0[1])
probs.min.05 <- as.data.frame(exp(lin.preds0.low[4:6,1])/sums.min.05[1])
probs.min.1 <- as.data.frame(exp(lin.preds0.low[7:9,1])/sums.min.1[1])

probs.mean.0 <- as.data.frame(exp(lin.preds0.low[10:12,1])/sums.mean.0[1])
probs.mean.05 <- as.data.frame(exp(lin.preds0.low[13:15,1])/sums.mean.05[1])
probs.mean.1 <- as.data.frame(exp(lin.preds0.low[16:18,1])/sums.mean.1[1])

probs.max.0 <- as.data.frame(exp(lin.preds0.low[19:21,1])/sums.max.0[1])
probs.max.05 <- as.data.frame(exp(lin.preds0.low[22:24,1])/sums.max.05[1])
probs.max.1 <- as.data.frame(exp(lin.preds0.low[25:27,1])/sums.max.1[1])

for(cc in 2:n.sim){
  probs.min.0 <- as.data.frame(cbind(probs.min.0, (exp(lin.preds0.low[1:3,cc])/sums.min.0[cc])))
  probs.min.05 <- as.data.frame(cbind(probs.min.05, (exp(lin.preds0.low[4:6,cc])/sums.min.05[cc])))
  probs.min.1 <- as.data.frame(cbind(probs.min.1, (exp(lin.preds0.low[7:9,cc])/sums.min.1[cc])))
  probs.mean.0 <- as.data.frame(cbind(probs.mean.0, (exp(lin.preds0.low[10:12,cc])/sums.mean.0[cc])))
  probs.mean.05 <- as.data.frame(cbind(probs.mean.05, (exp(lin.preds0.low[13:15,cc])/sums.mean.05[cc])))
  probs.mean.1 <- as.data.frame(cbind(probs.mean.1, (exp(lin.preds0.low[16:18,cc])/sums.mean.1[cc])))
  probs.max.0 <- as.data.frame(cbind(probs.max.0, (exp(lin.preds0.low[19:21,cc])/sums.max.0[cc])))
  probs.max.05 <- as.data.frame(cbind(probs.max.05, (exp(lin.preds0.low[22:24,cc])/sums.max.05[cc])))
  probs.max.1 <- as.data.frame(cbind(probs.max.1, (exp(lin.preds0.low[25:27,cc])/sums.max.1[cc])))
  
}

dat.tmp0.low$pred <- NA
dat.tmp0.low$pred[1:3] <- apply(probs.min.0, 1,mean, na.rm = T) 
dat.tmp0.low$pred[4:6] <- apply(probs.min.05, 1,mean, na.rm = T) 
dat.tmp0.low$pred[7:9] <- apply(probs.min.1, 1,mean, na.rm = T) 
dat.tmp0.low$pred[10:12] <- apply(probs.mean.0, 1,mean, na.rm = T) 
dat.tmp0.low$pred[13:15] <- apply(probs.mean.05, 1,mean, na.rm = T) 
dat.tmp0.low$pred[16:18] <- apply(probs.mean.1, 1,mean, na.rm = T) 
dat.tmp0.low$pred[19:21] <- apply(probs.max.0, 1,mean, na.rm = T) 
dat.tmp0.low$pred[22:24] <- apply(probs.max.05, 1,mean, na.rm = T) 
dat.tmp0.low$pred[25:27] <- apply(probs.max.1, 1,mean, na.rm = T) 

dat.tmp0.low$lower95 <- NA
dat.tmp0.low$lower95[1:3] <- apply(probs.min.0, 1,quantile, probs = 0.025, na.rm = T) 
dat.tmp0.low$lower95[4:6] <- apply(probs.min.05, 1,quantile, probs = 0.025, na.rm = T) 
dat.tmp0.low$lower95[7:9] <- apply(probs.min.1, 1,quantile, probs = 0.025, na.rm = T) 
dat.tmp0.low$lower95[10:12] <- apply(probs.mean.0, 1,quantile, probs = 0.025, na.rm = T) 
dat.tmp0.low$lower95[13:15] <- apply(probs.mean.05, 1,quantile, probs = 0.025, na.rm = T) 
dat.tmp0.low$lower95[16:18] <- apply(probs.mean.1, 1,quantile, probs = 0.025, na.rm = T) 
dat.tmp0.low$lower95[19:21] <- apply(probs.max.0, 1,quantile, probs = 0.025, na.rm = T) 
dat.tmp0.low$lower95[22:24] <- apply(probs.max.05, 1,quantile, probs = 0.025, na.rm = T) 
dat.tmp0.low$lower95[25:27] <- apply(probs.max.1, 1,quantile, probs = 0.025, na.rm = T) 

dat.tmp0.low$upper95 <- NA
dat.tmp0.low$upper95[1:3] <- apply(probs.min.0, 1,quantile, probs = 0.975, na.rm = T) 
dat.tmp0.low$upper95[4:6] <- apply(probs.min.05, 1,quantile, probs = 0.975, na.rm = T) 
dat.tmp0.low$upper95[7:9] <- apply(probs.min.1, 1,quantile, probs = 0.975, na.rm = T) 
dat.tmp0.low$upper95[10:12] <- apply(probs.mean.0, 1,quantile, probs = 0.975, na.rm = T) 
dat.tmp0.low$upper95[13:15] <- apply(probs.mean.05, 1,quantile, probs = 0.975, na.rm = T) 
dat.tmp0.low$upper95[16:18] <- apply(probs.mean.1, 1,quantile, probs = 0.975, na.rm = T) 
dat.tmp0.low$upper95[19:21] <- apply(probs.max.0, 1,quantile, probs = 0.975, na.rm = T) 
dat.tmp0.low$upper95[22:24] <- apply(probs.max.05, 1,quantile, probs = 0.975, na.rm = T) 
dat.tmp0.low$upper95[25:27] <- apply(probs.max.1, 1,quantile, probs = 0.975, na.rm = T) 

dat.tmp0.low$lower90 <- NA
dat.tmp0.low$lower90[1:3] <- apply(probs.min.0, 1,quantile, probs = 0.05, na.rm = T) 
dat.tmp0.low$lower90[4:6] <- apply(probs.min.05, 1,quantile, probs = 0.05, na.rm = T) 
dat.tmp0.low$lower90[7:9] <- apply(probs.min.1, 1,quantile, probs = 0.05, na.rm = T) 
dat.tmp0.low$lower90[10:12] <- apply(probs.mean.0, 1,quantile, probs = 0.05, na.rm = T) 
dat.tmp0.low$lower90[13:15] <- apply(probs.mean.05, 1,quantile, probs = 0.05, na.rm = T) 
dat.tmp0.low$lower90[16:18] <- apply(probs.mean.1, 1,quantile, probs = 0.05, na.rm = T) 
dat.tmp0.low$lower90[19:21] <- apply(probs.max.0, 1,quantile, probs = 0.05, na.rm = T) 
dat.tmp0.low$lower90[22:24] <- apply(probs.max.05, 1,quantile, probs = 0.05, na.rm = T) 
dat.tmp0.low$lower90[25:27] <- apply(probs.max.1, 1,quantile, probs = 0.05, na.rm = T) 

dat.tmp0.low$upper90 <- NA
dat.tmp0.low$upper90[1:3] <- apply(probs.min.0, 1,quantile, probs = 0.95, na.rm = T) 
dat.tmp0.low$upper90[4:6] <- apply(probs.min.05, 1,quantile, probs = 0.95, na.rm = T) 
dat.tmp0.low$upper90[7:9] <- apply(probs.min.1, 1,quantile, probs = 0.95, na.rm = T) 
dat.tmp0.low$upper90[10:12] <- apply(probs.mean.0, 1,quantile, probs = 0.95, na.rm = T) 
dat.tmp0.low$upper90[13:15] <- apply(probs.mean.05, 1,quantile, probs = 0.95, na.rm = T) 
dat.tmp0.low$upper90[16:18] <- apply(probs.mean.1, 1,quantile, probs = 0.95, na.rm = T) 
dat.tmp0.low$upper90[19:21] <- apply(probs.max.0, 1,quantile, probs = 0.95, na.rm = T) 
dat.tmp0.low$upper90[22:24] <- apply(probs.max.05, 1,quantile, probs = 0.95, na.rm = T) 
dat.tmp0.low$upper90[25:27] <- apply(probs.max.1, 1,quantile, probs = 0.95, na.rm = T) 

library(ggplot2)
dat.tmp0.low$cand <- rep(c("Candidate 1", 
                           "Candidate 2\n Ideol. = Mean", 
                           "Candidate 3\n Ideol. = Mean"),9)
dat.tmp0.low$facet1 <- rep(c("C1's Deviation: Min","C1's Deviation: Mean","C1's Deviation: Max"), 
                           each = 9)
dat.tmp0.low$facet2 <- rep(c("New Area = 0","New Area = 0.5","New Area = 1"), each = 3)

dat.tmp0.low$cand_f <- factor(dat.tmp0.low$cand, levels = c( "Candidate 3\n Ideol. = Mean",
                                                             "Candidate 2\n Ideol. = Mean",
                                                             "Candidate 1"))
dat.tmp0.low$facet1_f <- factor(dat.tmp0.low$facet1, levels = c( "C1's Deviation: Min",
                                                                 "C1's Deviation: Mean",
                                                                 "C1's Deviation: Max"))

p = ggplot()
p = p + geom_pointrange(data = dat.tmp0.low[dat.tmp0.low$geo_area != 1 & dat.tmp0.low$facet1 != "C1's Deviation: Mean",], aes(x = cand_f, y = pred, ymin = lower95, ymax = upper95), lty = 2)
p = p + geom_pointrange(data = dat.tmp0.low[dat.tmp0.low$geo_area != 1 & dat.tmp0.low$facet1 != "C1's Deviation: Mean",], aes(x = cand_f, y = pred, ymin = lower90, ymax = upper90))
p = p + facet_grid(facet2 ~ facet1_f)
p = p + coord_flip() + xlab("") + ylab("Pr(Nomination)")
p = p + theme(strip.background=element_rect(fill="white", colour = "black"),
              strip.text = element_text(size=14, face = "bold"),
              plot.background = element_rect(fill = "white"),
              panel.background= element_rect(fill = "white", colour = "black"),
              #axis.ticks.y = element_line(color = "white"),
              axis.title = element_text(face = "bold",size = 14),
              axis.text = element_text( size = 12 , color = "black", face = "bold"))
p

#######################################
# Nomination Failure
#######################################

dat <- read.dta("coordination2.dta")
dat$failure_bin <- ifelse(dat$failure >= 1,1,0)

summary(fail.1 <- glm(failure_bin ~ -1 + pref_num_cand + region_pr_m, data = dat, family = binomial(link="logit")))
summary(fail.2 <- glm(failure_bin ~ -1 + pref_num_inc + region_pr_m, data = dat, family = binomial(link="logit")))
fail.1.se <- sqrt(diag(cluster.vcov(fail.1, ~ pr_block)))
fail.2.se <- sqrt(diag(cluster.vcov(fail.2, ~ pr_block)))

#######################################
# Table 12
#######################################
stargazer(list(fail.1, fail.2,fail.1, fail.2) ,
          se = list(fail.1.se, fail.2.se, NULL,NULL))
